{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "\n",
    "from promptify.models.nlp.openai_model import OpenAI\n",
    "from promptify.prompts.nlp.prompter import Prompter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = OpenAI(api_key=\"YOUR_API_KEY\")\n",
    "nlp_prompter = Prompter(model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = json.load(open(\"data/tabular.json\",'r'))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(data['samples'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'Text': 'John Doe, a 32-year-old engineer, can be reached at johndoe@email.com.', 'Tabular Data': {'name': 'John Doe', 'age': 32, 'occupation': 'Engineer', 'email': 'johndoe@email.com'}} \n",
      "\n",
      "{'Text': 'The latest iPhone, the XS Max, is priced at $999 and comes in a 128GB version with a gold finish.', 'Tabular Data': {'product': 'iPhone', 'model': 'XS Max', 'price': 999, 'storage': '128GB', 'color': 'Gold'}} \n",
      "\n",
      "{'Text': 'Sarah Johnson, a 28-year-old teacher, can be contacted at sarahj@email.com.', 'Tabular Data': {'name': 'Sarah Johnson', 'age': 28, 'occupation': 'Teacher', 'email': 'sarahj@email.com'}} \n",
      "\n",
      "{'Text': 'The new MacBook Pro, featuring the M1 chip, costs $1999 and comes with 16GB of memory in a silver finish.', 'Tabular Data': {'product': 'MacBook Pro', 'model': 'M1', 'price': 1999, 'storage': '16GB', 'color': 'Silver'}} \n",
      "\n",
      "{'Text': 'Dr. Michael Smith can be reached at michaels@email.com.', 'Tabular Data': {'name': 'Michael Smith', 'occupation': 'Doctor', 'email': 'michaels@email.com'}} \n",
      "\n"
     ]
    }
   ],
   "source": [
    "examples = []\n",
    "for sample in data['samples'][:5]:\n",
    "    print(sample,\"\\n\")\n",
    "    examples.append((sample['Text'],sample['Tabular Data']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Tabular Data Extraction\n",
      "You are highly intelligent and accurate tabular data extractor from plain text input, your inputs can be text of arbitrary size but the output should be in JSON format.\n",
      "\n",
      "Examples:\n",
      "\n",
      "\n",
      "Input: John Doe, a 32-year-old engineer, can be reached at johndoe@email.com.\n",
      "Output: [{'Tabular': '{'name': 'John Doe', 'age': 32, 'occupation': 'Engineer', 'email': 'johndoe@email.com'}' }]\n",
      "\n",
      "Input: The latest iPhone, the XS Max, is priced at $999 and comes in a 128GB version with a gold finish.\n",
      "Output: [{'Tabular': '{'product': 'iPhone', 'model': 'XS Max', 'price': 999, 'storage': '128GB', 'color': 'Gold'}' }]\n",
      "\n",
      "Input: Sarah Johnson, a 28-year-old teacher, can be contacted at sarahj@email.com.\n",
      "Output: [{'Tabular': '{'name': 'Sarah Johnson', 'age': 28, 'occupation': 'Teacher', 'email': 'sarahj@email.com'}' }]\n",
      "\n",
      "Input: The new MacBook Pro, featuring the M1 chip, costs $1999 and comes with 16GB of memory in a silver finish.\n",
      "Output: [{'Tabular': '{'product': 'MacBook Pro', 'model': 'M1', 'price': 1999, 'storage': '16GB', 'color': 'Silver'}' }]\n",
      "\n",
      "Input: Dr. Michael Smith can be reached at michaels@email.com.\n",
      "Output: [{'Tabular': '{'name': 'Michael Smith', 'occupation': 'Doctor', 'email': 'michaels@email.com'}' }]\n",
      "\n",
      "\n",
      "Input: Emily Davis, a 31-year-old lawyer, can be reached at emilyd@email.com.\n",
      "Output:\n"
     ]
    }
   ],
   "source": [
    "prompt = nlp_prompter.generate_prompt('tabular_extractor.jinja',\n",
    "                                      examples=examples,\n",
    "                                      text_input=data['samples'][6][\"Text\"],\n",
    "                                     description=\"Tabular Data Extraction\")\n",
    "print(prompt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "output = nlp_prompter.fit('tabular_extractor.jinja',\n",
    "                                      examples=examples,\n",
    "                                      text_input=data['samples'][6][\"Text\"],\n",
    "                                      model_name=\"text-davinci-003\"\n",
    "                                     )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'Tabular': {'name': 'Emily Davis',\n",
       "   'age': 31,\n",
       "   'occupation': 'Lawyer',\n",
       "   'email': 'emilyd@email.com'}}]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = output['text'].replace(\"'{\", \"{\").replace(\"}'\", \"}\")\n",
    "eval(data)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "promptify_env",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "05c175b628f3488cd9e74cf2f743838a9e9599c12dfcb982576e5b290ede1498"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
